In [1]:
# import common packages
import altair as alt
import pandas as pd
import os
from toolz.curried import pipe
from vega_datasets import data
from altair import datum

Interactive Visualizations¶

Alan Lee¶


In [20]:
geo_race_urban_plot
Out[20]:

Loading and Processing Data:¶

In [2]:
# load dataset 
processed_data= pd.read_csv("../../data/processed/cleaned_race_data.csv", low_memory=True)
processed_data.head(5)

# allow larger data
alt.data_transformers.disable_max_rows()
Out[2]:
DataTransformerRegistry.enable('default')
In [3]:
# Create column voteshare = Democrats
processed_data['pref_dems'] = (processed_data['Voting_Preference'] == 'Democrat').astype(int)

processed_data['share_dems'] = processed_data.groupby(['Year_of_Study', 'Age'])['pref_dems'].transform('mean')
processed_data.tail(5)
Out[3]:
Year_of_Study Age Race Education Income_Group State_Code_FIPS State_Code Voting_Preference Urban birth_year pref_dems share_dems
31817 1952 33 White non-Hispanic Grade school or less 68 to 95 percentile 36 NY Democrat Suburban areas 1919 1 0.454545
31818 1952 48 White non-Hispanic High school 68 to 95 percentile 39 OH Republican Suburban areas 1904 0 0.250000
31819 1952 63 White non-Hispanic High school 34 to 67 percentile 36 NY Republican Suburban areas 1889 0 0.222222
31820 1952 25 Black non-Hispanic High school 17 to 33 percentile 26 MI Democrat Central cities 1927 1 0.428571
31821 1952 35 White non-Hispanic Grade school or less 68 to 95 percentile 39 OH Republican Central cities 1917 0 0.447368
In [4]:
# Create column voteshare = Republicans and drop working columns
processed_data['pref_repubs'] = (processed_data['Voting_Preference'] == 'Republican').astype(int)

processed_data['share_repubs'] = processed_data.groupby(['Year_of_Study', 'Age'])['pref_repubs'].transform('mean')

# processed_data = processed_data.drop(columns=['pref_repubs', 'pref_dems'])
In [5]:
processed_data['Year_of_Study'] = processed_data['Year_of_Study'].astype(int)
In [ ]:
 

Coding Visualizations Elements¶

In [6]:
# Create Chart Interactions

# slider to select year
slider = alt.binding_range(
    min=2004, max=2020, step=4, name='Year: '  # Range, step, and label
)

select_year = alt.selection_point(
    fields=['Year_of_Study'],   # Bound field
    bind=slider,                # Bind to slider control
    value=2020,                 # Default selected year
)

# slider to select birth year
select_birth_year = alt.selection_point(
    fields=['birth_year'],
    bind= alt.binding_range(min=1910, max=2020, step=4, name='Birth Year (Generation): '
),
    value=2000,
)


# drop down to select year
select_election_year = alt.selection_point(
    fields=['Year_of_Study'],
    bind= alt.binding_range(min=2000, max=2020, step=4, name='Election Year: '
),
    value=2020,
)

# drop down to select income
incomes = [None, '0 to 16 percentile',
       '17 to 33 percentile','34 to 67 percentile', '68 to 95 percentile', '96 to 100 percentile', 'DK; NA; refused to answer; no Pre IW',]
incomes_label =['All', '0 to 16 percentile',
       '17 to 33 percentile','34 to 67 percentile', '68 to 95 percentile', '96 to 100 percentile', "Don't know or refused"]
                        
income_dropdown = alt.binding_select(options=incomes, labels= incomes_label, name="Income Group: ", )
income_select = alt.selection_point(fields=['Income_Group'], bind=income_dropdown, 
                                    # value=[{'Income_Group': '0 to 16 percentile'}]
                                   )

# select year from legend
select_year_legend = alt.selection_point(
    fields=['Year_of_Study'],
    bind='legend',
)
In [7]:
# select States
# choropleth interactive elements
select_state = alt.selection_point(
    fields=['State_Code_FIPS'],
    empty='none'
)

# state drop down
# drop down to select income
states = [None, 'VA', 'OR', 'CA', 'ID', 'IA', 'LA', 'NE', 'NY', 'TN', 'AZ', 'MA',
       'FL', 'NC', 'NJ', 'AR', 'WI', 'PA', 'OK', 'TX', 'IL', 'KY', 'DC',
       'MD', 'KS', 'AL', 'MI', 'GA', 'MS', 'MN', 'CO', 'OH', 'IN', 'SC',
       'CT', 'MO', 'NM', 'WA', 'UT', 'HI', 'NV', 'AK', 'MT', 'NH', 'ME',
       'DE', 'VT', 'WV', 'ND', 'WY', 'RI', 'SD',]
states_label =['All', 'VA', 'OR', 'CA', 'ID', 'IA', 'LA', 'NE', 'NY', 'TN', 'AZ', 'MA',
       'FL', 'NC', 'NJ', 'AR', 'WI', 'PA', 'OK', 'TX', 'IL', 'KY', 'DC',
       'MD', 'KS', 'AL', 'MI', 'GA', 'MS', 'MN', 'CO', 'OH', 'IN', 'SC',
       'CT', 'MO', 'NM', 'WA', 'UT', 'HI', 'NV', 'AK', 'MT', 'NH', 'ME',
       'DE', 'VT', 'WV', 'ND', 'WY', 'RI', 'SD',]
                        
states_dropdown = alt.binding_select(options=states, labels= states_label, name="State: ", )
select_state_drop = alt.selection_point(fields=['State_Code'], bind=states_dropdown
                                   )
In [8]:
# drop down to select Education
educations = [None, 'College or advanced degree', 'High school', 'Some college',
       'Grade school or less', 'DK; NA']
education_labels =['All', 'College or advanced degree', 'High school', 'Some college',
       'Grade school or less', 'DK; NA']
                        
education_dropdown = alt.binding_select(options=educations, labels= education_labels, name="Level of Education: ", )
select_education = alt.selection_point(fields=['Education'], bind=education_dropdown,
                                   )
In [9]:
#create brush that grabs year or age info from x axis
brush = alt.selection_interval(
    encodings=['x'] 
)

Question 3.1 Plot Code¶

In [10]:
# generation vs vote share plot

loess = alt.Chart(processed_data).add_params(
    select_election_year,
    income_select
).transform_filter(
    alt.datum.Year_of_Study >=2000,
    alt.datum.Age >=18
).mark_circle(opacity=0.25, size = 50).encode(
    alt.X('birth_year:Q', scale=alt.Scale(zero=False), axis=alt.Axis(format='d'), title = "Year of Birth"),
    alt.Y('share_repubs', title = "Republican Vote Share"),
    opacity=alt.condition(income_select & brush & (select_election_year), alt.value(1), alt.value(0.05)),
    color=alt.when(income_select & brush & (select_election_year)).then('Year_of_Study:N', legend=alt.Legend(title='Election Year')).otherwise(alt.ColorValue("lightgray")),
    tooltip=[
        alt.Tooltip('State_Code:O', title='State'),
        alt.Tooltip('birth_year:Q', title='Year of Birth'),
        alt.Tooltip('Age:Q', title='Age at Election'),
        alt.Tooltip('Value:Q', title='Avg Rating', format='.2f'),
        alt.Tooltip('Year_of_Study:N', title='Election Year')
        ]
).add_params(
    brush
).properties(
    title = "Republican Vote Share by Generation"
)

generation_plot = loess+loess.transform_loess('birth_year', 'share_repubs', groupby=['Year_of_Study']).mark_line(size=4)
# generation_plot
In [11]:
# age and vote share plot
age_plot = alt.Chart(processed_data).transform_filter(
    alt.datum.Year_of_Study >=1990,
    alt.datum.Age >=18
).mark_circle(size = 30).add_params(
    brush, select_election_year, income_select
).encode(
    alt.X('Age', title = 'Age (years)'),
    alt.Y('share_repubs', title = "Republican Vote Share"),
    color=alt.when(income_select & brush & select_election_year).then("Year_of_Study:N", legend=alt.Legend(title='Election Year')).otherwise(alt.ColorValue("gainsboro")),
    opacity=alt.condition(income_select & brush & select_election_year, alt.value(1), alt.value(0.05)),
    tooltip=[
        alt.Tooltip('State_Code:O', title='State'),
        alt.Tooltip('birth_year:Q', title='Year of Birth'),
        alt.Tooltip('Age:Q', title='Age at Election'),
        alt.Tooltip('Value:Q', title='Avg Rating', format='.2f'),
        alt.Tooltip('Year_of_Study:N', title='Election Year')
        ]
).properties(
    title = "Republican Vote Share by Age"
)

# age_plot

Plot for Question 3.1¶

In [12]:
(age_plot| generation_plot)
Out[12]:

Figure 1: Republican Vote Share by Age and Republican Vote Share by Generation (Birth Year)
This plot shows the vote share trends and their relationship with one's age each election or the generation they were born in. Users can select change the Election year using the slider or select dfferent income group.

If one's generation was an important determinant of their vote choice, vote shares at each different election for the same generation would be similar. If age is an important determinant, there should be an association with increasing age and increasing republican vote share according to the theory.


Question 3.2 Plot Code¶

In [13]:
# On demand details plot for Republican Vote Share by Race

# simplify race labels
processed_data['Race'] = processed_data['Race'].replace({
    'Asian or Pacific Islander, non-Hispanic': 'Asian/Pacific Islander',
    'White non-Hispanic': 'White',
    'Black non-Hispanic': 'Black',
    'American Indian or Alaska Native, non-Hispanic': 'Native American',
    'Other or multiple races, non-Hispanic': 'Other or multiple races',
    'Non-white and non-black': 'Other or multiple races',
    'Missing': 'Missing',
    'Hispanic': 'Hispanic'
})
# reorder race so missing is last
race_order = [
    'Asian/Pacific Islander',
    'White',
    'Black',
    'Hispanic',
    'Native American',
    'Other or multiple races',
    'Missing'
]

# drop down to select race
races = [None, 'Asian/Pacific Islander', 'White', 'Hispanic', 'Black', 'Missing',
       'Native American', 'Other or multiple races',]
race_labels =['All', 'Asian/Pacific Islander', 'White', 'Hispanic', 'Black', 'Missing',
       'Native American', 'Other or multiple races']
                        
races_dropdown = alt.binding_select(options=races, labels= race_labels, name="Racial Group: ", )
select_race = alt.selection_point(fields=['Race'], bind=races_dropdown,
                                   )

# select year from legend
select_race_legend = alt.selection_point(
    fields=['Race'],
    bind='legend',
)

# select a point for which to provide details-on-demand
label = alt.selection_point(
    encodings=['x'], # limit selection to x-axis value
    on='mouseover',  # select on mouseover events
    nearest=True,    # select data point nearest the cursor
    empty='none'     # empty selection includes no data points
)

# define our base line chart of stock prices
base = alt.Chart(processed_data).mark_line().encode(
    alt.X('Year_of_Study:Q', scale=alt.Scale(zero=False), axis=alt.Axis(format='d'), title = "Election Year"),
    alt.Y('mean(share_repubs):Q', scale=alt.Scale(zero=False), title = "Republican Vote Share"),
    color=alt.when(select_race_legend).then('Race:N', legend=alt.Legend(title='Racial/Ethnic Group')).otherwise(alt.ColorValue("lightgray")),
    opacity=alt.condition(select_race_legend, alt.value(1), alt.value(0.1)),
).add_params(select_race_legend)

race_vote_plot = alt.layer(
    
    base, # base line chart
    
    # add a rule mark to serve as a guide line
    alt.Chart(processed_data).mark_rule(color='#aaa').encode(
        x='Year_of_Study:Q'
    ).transform_filter(label),
    
    # add circle marks for selected time points, hide unselected points
    base.mark_circle().encode(
        opacity=alt.condition(label, alt.value(1), alt.value(0))
    ).add_params(label),

    # add white stroked text to provide a legible background for labels
    base.mark_text(align='left', dx=7, dy=-7, stroke='white', strokeWidth=2).encode(
        text= alt.Text('mean(share_repubs):Q', format='.2f')
    ).transform_filter(label),
    
    # add text labels for stock prices
    base.mark_text(align='left', dx=7, dy=-5,).encode(
        text= alt.Text('mean(share_repubs):Q', format='.2f')
    ).transform_filter(label),
).properties(
    width=700,
    height=400,
    title = "Republican Vote Share by Racial or Ethnic Group"
)

Plot for Question 3.2¶

In [14]:
race_vote_plot
Out[14]:

Figure 2: Republican Vote Share by Racial or Ethnic Group
| This plot shows on demand detail of the vote share at each election. Select one ore more groups on the legend to highlight them for easier comparison

Question 3.3 Plot Code¶

In [15]:
# data manipulation specific to this part:

# simplify urban labels
processed_data['Urban'] = processed_data['Urban'].replace({
    'Central cities': 'Central cities',
    'Suburban areas': 'Suburban',
    'Rural, small towns, outlying and adjacent areas': 'Rural'
})

# remove missing urban and race values.
processed_data = processed_data[
    processed_data['Race'].notnull() &
    processed_data['Urban'].notnull()
]
# aggregate vote data for choropleth
agg_data = (
    processed_data
    .groupby('State_Code_FIPS')
    .agg({
        'share_dems': 'mean',
        'share_repubs': 'mean',
        'State_Code': 'first'
    })
    .reset_index()
)
In [16]:
# plot race, urban density, and republican vote plot
rural_bar_plot = alt.Chart(
    processed_data).mark_bar().transform_filter(
    select_state,
    select_state_drop
).encode(
    alt.X('Urban', axis=None),
    alt.Y('mean(share_repubs)'),
    alt.Column('Race',  header=alt.Axis(orient='bottom'), title=None, sort=race_order),
    alt.Color('Urban', title = 'Urband Density'),
).add_params(
    select_state,
    select_state_drop
).properties(
    title = "Republican Vote Share by Race and Urban Density",
    width = 100,
    height = 300
)
In [17]:
# plot geographic vote share 
us_choropleth = alt.Chart(alt.topo_feature(data.us_10m.url, 'states')).mark_geoshape(
    stroke='#aaa', strokeWidth=0.25
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(agg_data, key = 'State_Code_FIPS', fields = ['State_Code_FIPS', 'State_Code', 'share_dems', 'share_repubs', 'Year_of_Study', 'Income_Group'])
).encode(
    alt.Color('share_repubs:Q', scale=alt.Scale(scheme='viridis', reverse=True), title = 'Vote Share Republican'),
    tooltip=[
        alt.Tooltip('State_Code:N', title='State'),
        alt.Tooltip('share_repubs:Q', title='Mean Vote Share Republicans',format='.2f' )
    ],
    stroke=alt.condition(select_state, alt.value('red'), alt.value('white')),
    strokeWidth=alt.condition(select_state, alt.value(4), alt.value(1)),
    strokeOpacity=alt.condition(select_state, alt.value(4), alt.value(2)),
    opacity=alt.condition(select_state_drop, alt.value(1), alt.value(0.25)),
).project(
    type='albersUsa'
).add_params(
    select_state,
    select_state_drop
).properties(
    height = 600,
    width = 600,
    title = 'Average Republican Vote Shares Across all Elections'
)

# us_choropleth
In [18]:
geo_race_urban_plot = ((us_choropleth|rural_bar_plot).configure_view(
    step=30
).properties(
    title = "Republican Vote Share by Geography, Urban Density, and Race"
)
         )

Plot for Question 3.3¶

Figure 3: Click to select a state. Some states will not show all categories of information because the survey did not capture all demographics in all states.

In [ ]:
 
In [ ]:
 
In [ ]: